#!/usr/bin/env python
# coding: utf-8

# In[ ]:


get_ipython().run_line_magic('matplotlib', 'inline')


# 
# Autograd: automatic differentiation
# ===================================
# 
# Central to all neural networks in PyTorch is the ``autograd`` package.
# Let’s first briefly visit this, and we will then go to training our
# first neural network.
# 
# 
# The ``autograd`` package provides automatic differentiation for all operations
# on Tensors. It is a define-by-run framework, which means that your backprop is
# defined by how your code is run, and that every single iteration can be
# different.
# 
# Let us see this in more simple terms with some examples.
# 
# Tensor
# --------
# 
# ``torch.Tensor`` is the central class of the package. If you set its attribute
# ``.requires_grad`` as ``True``, it starts to track all operations on it. When
# you finish your computation you can call ``.backward()`` and have all the
# gradients computed automatically. The gradient for this tensor will be
# accumulated into ``.grad`` attribute.
# 
# To stop a tensor from tracking history, you can call ``.detach()`` to detach
# it from the computation history, and to prevent future computation from being
# tracked.
# 
# To prevent tracking history (and using memory), you can also wrap the code block
# in ``with torch.no_grad():``. This can be particularly helpful when evaluating a
# model because the model may have trainable parameters with `requires_grad=True`,
# but we don't need the gradients.
# 
# There’s one more class which is very important for autograd
# implementation - a ``Function``.
# 
# ``Tensor`` and ``Function`` are interconnected and build up an acyclic
# graph, that encodes a complete history of computation. Each variable has
# a ``.grad_fn`` attribute that references a ``Function`` that has created
# the ``Tensor`` (except for Tensors created by the user - their
# ``grad_fn is None``).
# 
# If you want to compute the derivatives, you can call ``.backward()`` on
# a ``Tensor``. If ``Tensor`` is a scalar (i.e. it holds a one element
# data), you don’t need to specify any arguments to ``backward()``,
# however if it has more elements, you need to specify a ``gradient``
# argument that is a tensor of matching shape.
# 
# 

# In[ ]:


import torch


# Create a tensor and set requires_grad=True to track computation with it
# 
# 

# In[ ]:


x = torch.ones(2, 2, requires_grad=True)
print(x)


# Do an operation of tensor:
# 
# 

# In[ ]:


y = x + 2
print(y)


# ``y`` was created as a result of an operation, so it has a ``grad_fn``.
# 
# 

# In[ ]:


print(y.grad_fn)


# Do more operations on y
# 
# 

# In[ ]:


z = y * y * 3
out = z.mean()

print(z, out)


# ``.requires_grad_( ... )`` changes an existing Tensor's ``requires_grad``
# flag in-place. The input flag defaults to ``True`` if not given.
# 
# 

# In[ ]:


a = torch.randn(2, 2)
a = ((a * 3) / (a - 1))
print(a.requires_grad)
a.requires_grad_(True)
print(a.requires_grad)
b = (a * a).sum()
print(b.grad_fn)


# Gradients
# ---------
# Let's backprop now
# Because ``out`` contains a single scalar, ``out.backward()`` is
# equivalent to ``out.backward(torch.tensor(1))``.
# 
# 

# In[ ]:


out.backward()


# print gradients d(out)/dx
# 
# 
# 

# In[ ]:


print(x.grad)


# You should have got a matrix of ``4.5``. Let’s call the ``out``
# *Tensor* “$o$”.
# We have that $o = \frac{1}{4}\sum_i z_i$,
# $z_i = 3(x_i+2)^2$ and $z_i\bigr\rvert_{x_i=1} = 27$.
# Therefore,
# $\frac{\partial o}{\partial x_i} = \frac{3}{2}(x_i+2)$, hence
# $\frac{\partial o}{\partial x_i}\bigr\rvert_{x_i=1} = \frac{9}{2} = 4.5$.
# 
# 

# You can do many crazy things with autograd!
# 
# 

# In[ ]:


x = torch.randn(3, requires_grad=True)

y = x * 2
while y.data.norm() < 1000:
    y = y * 2

print(y)


# In[ ]:


gradients = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float)
y.backward(gradients)

print(x.grad)


# You can also stops autograd from tracking history on Tensors
# with requires_grad=True by wrapping the code block in
# ``with torch.no_grad():``
# 
# 

# In[ ]:


print(x.requires_grad)
print((x ** 2).requires_grad)

with torch.no_grad():
	print((x ** 2).requires_grad)


# **Read Later:**
# 
# Documentation of ``autograd`` and ``Function`` is at
# http://pytorch.org/docs/autograd
# 
# 
